/**************************************************************************
	Replication do-file: "Corruption in Customs"
	Cyril Chalendard, Ana Fernandes, Gael Raballand and Bob Rijkers
	
	Created on: 01/07/2022
**************************************************************************/

clear all
set more off, perm
cap log close
cls

* ----------------------- DIRECTORIES AND FOLDERS ----------------------- *

* Directories in which data are stored
global main "PUT YOUR DIRECTORY PATH HERE"
cd "$folder"

* Folders
global outputdata = "$main\Output Data"
cap mkdir "$main\Tables"
global tables = "$main\Tables"

* ----------------------------- BEGINS HERE ----------------------------- *

* -------
* Dataset
* -------
use "$outputdata\CFRR.dta", clear // open dataset

* New variables
** Indicators of revenue loss
*** Revenue losses over all declarations - each of the numbers entering the formulas is a coefficient from panel B of Table A22
gen li = tot_taxes_usd*exp(0.732*eis_f) - tot_taxes_usd if sample_reg==1 & eis_f!=.
gen liw = tot_taxes_usd*exp(1.112*eis_f) - tot_taxes_usd if sample_reg==1 & eis_f!=.
gen le = tot_taxes_usd*exp(1.659*eis_f) - tot_taxes_usd if sample_reg==1 & eis_f!=.
gen lew = tot_taxes_usd*exp(2.085*eis_f) - tot_taxes_usd if sample_reg==1 & eis_f!=.
gen lvt = tot_taxes_usd*exp(0.851*eis_f) - tot_taxes_usd if sample_reg==1 & eis_f!=.
*** Revenue losses over declarations with significant excess interaction - each of the numbers entering the formulas is a coefficient from panel B of Table A22
gen sli = cond((sample_reg==1 & eis_f!=. & sig_c_f==1), (tot_taxes_usd*exp(0.732*eis_f) - tot_taxes_usd), cond((sample_reg==1 & eis_f!=.), 0, .))
gen sliw = cond((sample_reg==1 & eis_f!=. & sig_c_f==1), (tot_taxes_usd*exp(1.112*eis_f) - tot_taxes_usd), cond((sample_reg==1 & eis_f!=.), 0, .))
gen sle = cond((sample_reg==1 & eis_f!=. & sig_c_f==1), (tot_taxes_usd*exp(1.659*eis_f) - tot_taxes_usd), cond((sample_reg==1 & eis_f!=.), 0, .))
gen slew = cond((sample_reg==1 & eis_f!=. & sig_c_f==1), (tot_taxes_usd*exp(2.085*eis_f) - tot_taxes_usd), cond((sample_reg==1 & eis_f!=.), 0, .))
gen slvt = cond((sample_reg==1 & eis_f!=. & sig_c_f==1), (tot_taxes_usd*exp(0.851*eis_f) - tot_taxes_usd), cond((sample_reg==1 & eis_f!=.), 0, .))
** Revenue loss by inspector and semester
foreach var of varlist tot_taxes_usd li liw le lew lvt sli sliw sle slew slvt {
	replace `var' = `var'/1000

	* Total
	egen t_is_`var' = sum(`var') if sample_reg==1 & eis_f!=. , by(is) // total for each inspector-semester pair
	egen t_bs_`var' = sum(`var') if sample_reg==1 & eis_f!=. , by(bs) // total for each broker-semester pair

	* Average
	/* Create auxiliary variables to make sure the variable is always defined even when significant excess interaction is 0. */
	egen AUXm_is_`var' = mean(`var') if sample_reg==1 & eis_f!=. , by(is)
	egen m_is_`var' = max(AUXm_is_`var') if sample_reg==1 & eis_f!=. , by(is)
	drop AUXm_is_`var'
	egen AUXm_bs_`var' = mean(`var') if sample_reg==1 & eis_f!=. , by(bs)
	egen m_bs_`var' = max(AUXm_bs_`var') if sample_reg==1 & eis_f!=. , by(bs)
	drop AUXm_bs_`var'

	* Share 
	/* First compute total by semester, then create the shares. */
	egen st_`var' = sum(`var') if sample_reg==1 & eis_f!=. , by(nc_sem)
	gen p_is_`var' = t_is_`var'/st_`var'
	gen p_bs_`var' = t_bs_`var'/st_`var'				
	replace `var' = `var'*1000
}
cap drop n
gen n = 1


* -------
* Table 5
* -------

* Panel A
* -------
* Matrix
mat table5_1N = J(9,2,.)
mat table5_1P = J(9,2,.)

* Dataset
preserve
** Sample
drop if eis_f==. // define sample
keep if sample_reg==1 & i_m_excess!=. // keep relevant observations
drop if nc_sem>10 // define sample
duplicates drop is, force // drop duplicates by inspector-semester
** New variables
gen minp_is_slew = -p_is_slew // create inverse
bys nc_sem: egen rank = rank(minp_is_slew), unique // rank inverse by semester
/* One issue is that there are many inspectors with the same rank since some of them do not participate in the scheme. For each of these inspectors we set their contributions equal to the average tax share and % contribution to total taxes of the average inspector that did not have excess interaction that semester. Note we don't have to "average" revenue losses due to corruption as those are zero by construction. */
egen mi0tax = mean(t_is_tot_taxes_usd) if i_m_excess==0 , by(nc_sem)
egen mi0p_is_tp = mean(p_is_tot_taxes_usd) if i_m_excess==0 , by(nc_sem)
gen It_is_tp = cond((i_m_excess==0 & sample_reg==1 & eis_f!=.), mi0tax, cond((sample_reg==1 & eis_f!=.), t_is_tot_taxes_usd, .))
gen Ip_is_tp = cond((i_m_excess==0 & sample_reg==1 & eis_f!=.), mi0p_is_tp, cond((sample_reg==1 & eis_f!=.), p_is_tot_taxes_usd, .))
foreach var of varlist t_is_tot_taxes_usd t_is_slew p_is_tot_taxes_usd p_is_slew It_is_tp Ip_is_tp {
	gen `var'6 = `var'/6
} // Loop: divide by 6 to get average per semester
gen Grankl = 1 + irecode(rank, 1,2,3,4,5,10, .) // create rank
label define Grankl 1 "1" 2 "2" 3 "3" 4 "4" 5 "5"  6 "6-10" 7 "11 and higher"
label values Grankl Grankl

* Loop for aggregated ranking
** Totals
local vars `" "It_is_tp6" "t_is_slew6" "'
forvalues i = 1(1)2 {

	local x: word `i' of `vars'
	
	tabstat `x', by(Grankl) stats(sum) save // capture summary statistics
	
	forvalues j = 1(1)8 {
		mat table5_1N[`j',`i'] = r(Stat`j')
	}	
}
** Percentages
local vars `" "Ip_is_tp6" "p_is_slew6" "'
forvalues i = 1(1)2 {

	local x: word `i' of `vars'
	
	tabstat `x', by(Grankl) stats(sum) save // capture summary statistics
	
	forvalues j = 1(1)8 {
		mat table5_1P[`j',`i'] = r(Stat`j')
	}	
}

* Loop for averages
** Totals
local i = 1
foreach x of varlist t_is_tot_taxes_usd t_is_slew {
	
	tabstat `x', by(rank) save // distribution by rank
	mat table5_1N[9,`i'] = r(StatTotal)
	local i = `i'+1
}
** Percentages	
local i = 1
foreach x of varlist Ip_is_tp p_is_slew {
	
	tabstat `x', by(rank) save // distribution by rank
	mat table5_1P[9,`i'] = r(StatTotal)
	local i = `i'+1
}

restore


* Panel B
* -------
* Matrix
mat table5_2N = J(10,2,.)
mat table5_2P = J(10,2,.)

* Dataset
preserve
** Sample
drop if eis_f==. // define sample
keep if sample_reg==1 & b_m_excess!=. // keep necessary observations
drop if nc_sem>10 // define sample
duplicates drop bs, force // drop duplicates by broker-semester
** New variables
gen minp_bs_slew = -p_bs_slew // create inverse
bys nc_sem: egen rank = rank(minp_bs_slew), unique // rank inverse by semester
/* One issue is that some brokers do not participate in the scheme. For each of these brokers we set their contributions equal to the average tax share and % contribution to total taxes of the average inspector that did not have excess interaction that semester. Note we don't have to "average" revenue losses due to corruption as those are zero by construction. */
egen mb0tax = mean(t_bs_tot_taxes_usd) if b_m_excess==0 , by(nc_sem)
egen mb0p_bs_tp = mean(p_bs_tot_taxes_usd) if b_m_excess==0 , by(nc_sem)
gen Bt_bs_tp = cond((b_m_excess==0 & sample_reg==1 & eis_f!=.), mb0tax, cond((sample_reg==1 & eis_f!=.), t_bs_tot_taxes_usd, .))
gen Bp_bs_tp = cond((b_m_excess==0 & sample_reg==1 & eis_f!=.), mb0p_bs_tp, cond((sample_reg==1 & eis_f!=.), p_bs_tot_taxes_usd, .))
foreach var of varlist t_bs_tot_taxes_usd t_bs_slew p_bs_tot_taxes_usd p_bs_slew Bt_bs_tp Bp_bs_tp {
	gen `var'6 = `var'/6
} // Loop: divide by 6 to get average per semester
gen Grankb = 1 + irecode(rank, 1,2,3,4,5,10, 20, .) // create rank
label define Grankb 1 "1" 2 "2" 3 "3" 4 "4" 5 "5"  6 "6-10" 7 "11-20" 8 "21 and higher"
label values Grankb Grankb

* Loop for aggregated ranking
** Totals
local vars `" "Bt_bs_tp6" "t_bs_slew6" "'
forvalues i = 1(1)2 {

	local x: word `i' of `vars'
	
	tabstat `x', by(Grankb) stats(sum) save // capture summary statistics
	
	forvalues j = 1(1)8 {
		mat table5_2N[`j',`i'] = r(Stat`j')
	}
}
** Percentages
local vars `" "Bp_bs_tp6" "p_bs_slew6" "'
forvalues i = 1(1)2 {

	local x: word `i' of `vars'
	
	tabstat `x', by(Grankb) stats(sum) save // capture summary statistics
	
	forvalues j = 1(1)8 {
		mat table5_2P[`j',`i'] = r(Stat`j')
	}
}

* Loop for averages
** Totals
local i = 1
foreach x of varlist t_bs_tot_taxes_usd t_bs_slew {
	
	tabstat `x', by(rank) save // distribution by rank
	mat table5_2N[10,`i'] = r(StatTotal)
	local i = `i'+1
}
** Percentages	
local i = 1
foreach x of varlist p_bs_tot_taxes_usd p_bs_slew {
	
	tabstat `x', by(rank) save // distribution by rank
	mat table5_2P[10,`i'] = r(StatTotal)
	local i = `i'+1
}

restore


* Panel C
* -------
* Matrix
mat table5_3N = J(1,2,.)
mat table5_3P = J(1,2,.)

* Dataset
preserve

* New variables
gen tp6 = tot_taxes_usd/6
gen slew6 = slew/6

* Loop for aggregated ranking
** Totals
local i = 1
foreach x of varlist tp6 slew6 {
	
	tabstat `x' if sample_reg==1 & eis_f!=., stats(sum) save // capture summary statistics
	mat table5_3N[1,`i'] = r(StatTotal)
	local i = `i'+1
}
** Percentages
*** Column 3: sum of percentages
forvalues i = 1(1)2 {
	dis table5_2P[1,`i'] + table5_2P[2,`i'] + table5_2P[3,`i'] + table5_2P[4,`i'] + table5_2P[5,`i'] + table5_2P[6,`i'] + table5_2P[7,`i'] + table5_2P[8,`i']
} // Loop: checking percentages add up to 100%
tempvar totC3
gen `totC3' = table5_2P[1,2] + table5_2P[2,2] + table5_2P[3,2] + table5_2P[4,2] + table5_2P[5,2] + table5_2P[6,2] + table5_2P[7,2] + table5_2P[8,2]
qui: sum `totC3' // capture summary statistics
mat table5_3P[1,1] = `r(max)'
*** Column 4: calculating how much higher tax yield could be
mat table5_3P[1,2] = table5_3N[1,2]/table5_3N[1,1]

restore


* Export
* ------
putexcel set "$tables\Table 5.xlsx", replace sheet(Table5) // create a new excel

* Dependent variables
putexcel (A1:A2), merge hcenter vcenter
putexcel A1 = "Rank per semester", left vcenter
local cells `" "B" "C" "D" "E" "'
local labels `" "Total taxes collected" "Tax losses" "% total taxes collected" "% total tax losses" "'
forvalues i = 1(1)4 {

	local x: word `i' of `labels'
	local y: word `i' of `cells'

	putexcel `y'1 = "`x'", hcenter vcenter bold	
}
local cells `" "B" "C" "D" "E" "'
local labels `" "average per semester (USD)" "average per semester (USD)" "per semester" "per semester" "'
forvalues i = 1(1)4 {

	local x: word `i' of `labels'
	local y: word `i' of `cells'

	putexcel `y'2 = "`x'", hcenter vcenter
}

* Panels
** Name
local y = 4
local z = 5
local labels1 `" "Panel A: By inspector" "Panel B: By broker" "'
local labels2 `" "(ranked in terms of tax revenue losses, from largest to smallest, by semester)" "(ranked in terms of tax revenue losses, from largest to smallest, by semester)" "'
forvalues i = 1(1)2 {

	local w: word `i' of `labels1'
	local x: word `i' of `labels2'

	putexcel (A`y':E`y'), merge hcenter vcenter bold
	putexcel A`y' = "`w'"
	putexcel (A`z':E`z'), merge hcenter vcenter
	putexcel A`z' = "`x'"
	local y = `y'+13
	local z = `z'+13
}
putexcel (A31:E31), merge hcenter vcenter bold
putexcel A31 = "Panel C: Overall"
** Idependent variables
local x = 7
local y = 20
forvalues i = 1(1)5 {
	putexcel A`x' = "`i'", left vcenter
	putexcel A`y' = "`i'", left vcenter
	local x = `x'+1
	local y = `y'+1
}
local cells `" "6" "12" "13" "15" "19" "25" "26" "27" "29" "32" "'
local labels `" "Inspector rank (in a given semester)" "Rank 6-10 (combined)" "Rank 11 and higher (combined)" "Average per inspector per semester" "Broker rank (in a given semester)" "Rank 6-10 (combined)" "Rank 10-20 (combined)" "Rank 21 and higher (combined)" "Average by broker per semester" "Total per semester" "'
forvalues i = 1(1)10 {

	local x: word `i' of `labels'
	local y: word `i' of `cells'

	putexcel A`y' = "`x'", left vcenter
}
** Coefficients
local cells `" "7" "20" "32"'
forvalues i = 1(1)3 {
	
	local x: word `i' of `cells'

	putexcel B`x' = matrix(table5_`i'N), nformat(number_sep_d2) hcenter vcenter
	putexcel D`x' = matrix(table5_`i'P), nformat(0.0%) hcenter vcenter
}
putexcel B32 = matrix(table5_3N), nformat(number_sep) hcenter vcenter
putexcel D32 = matrix(table5_3P), nformat(0.0%) hcenter vcenter


* -------------------------------- ENDS HERE -------------------------------- *